package com.siyoumi.util;

import lombok.Getter;
import lombok.extern.slf4j.Slf4j;

import java.util.*;
import java.util.concurrent.ConcurrentHashMap;

/**
 * 敏感词过滤 dfa算法
 */
@Slf4j
public class XFinder {
    // 敏感词收集，已存在的敏感词，不在添加
    private Set<String> WORDS = new HashSet<>();
    // 由敏感词生成的字树
    @Getter
    private Map<String, Map> TREE = new ConcurrentHashMap<>();
    /* 在树当中标志一个词的结束 */
    public static final String TREE_END_KEY = "^";
    // 敏感词value标记
    public static final String WORD_VALUE = "v";
    // 敏感词长度标记
    public static final String WORD_LENGTH = "l";
    // 默认替换符
    public static final char DEFAULT_REPLACEMENT = '*';
    // 默认起始标记
    public static final String DEFAULT_START_TAG = "<font color=\"red\">";
    // 默认结束标记
    public static final String DEFAULT_END_TAG = "</font>";

    /**
     * 过滤文本，并标记出敏感词，默认使用HTML中红色font标记
     *
     * @param text
     */
    public String filter(String text) {
        return findMark(TREE, text, DEFAULT_START_TAG, DEFAULT_END_TAG);
    }

    /**
     * 存在关键词 err
     *
     * @param text
     */
    public XReturn findNotExists(String text) {
        String word = findExists(TREE, text, 0);
        XReturn r = XReturn.getR(20047);
        r.setData("word", word);

        if (XStr.isNullOrEmpty(word)) {
            r.setErrCode(0);
        }

        return r;
    }

    /**
     * 过滤文本，并标记出敏感词
     *
     * @param text
     * @param startTag
     * @param endTag
     */
    public String filter(String text, String startTag, String endTag) {
        return findMark(TREE, text, startTag, endTag);
    }

    public static String findMark(Map<String, Map> tree, String text, String startTag, String endTag) {
        if (text != null && text.trim().length() > 0) {
            StringBuffer sb = new StringBuffer("");
            findMark(tree, text, 0, startTag, endTag, sb);
            return sb.toString();
        }
        return text;
    }

    /**
     * 标记出敏感关键词，加前后标签
     *
     * @param tree
     * @param text
     * @param index
     * @param startTag
     * @param endTag
     * @param sb
     */
    private static void findMark(Map<String, Map> tree, String text, int index, String startTag, String endTag, StringBuffer sb) {
        int last = 0;
        int textLen = text.length();
        while (index < textLen) {
            String tmp = text.substring(index, index + 1);
            String nexts = text.substring(index);
            String word = "";
            word = findMaxWord(tree, nexts, 0, word);
            if (!"".equals(word)) {
                //找到敏感词，进行替换操作
                int wordLen = word.length();
                if (index >= last) {
                    sb.append(startTag + word + endTag);
                } else {
                    if (last < index + wordLen) {
                        //？？？
                        sb.insert(sb.length() - endTag.length(), text.substring(last, index + wordLen));
                    }
                }
                last = index + wordLen;
            } else {
                if (index >= last) {
                    sb.append(tmp);
                }
            }
            index++;
        }
    }

    /**
     * 是否存在敏感词
     *
     * @param tree
     * @param text
     * @param index
     */
    private static String findExists(Map<String, Map> tree, String text, int index) {
        int textLen = text.length();
        while (index < textLen) {
            String nexts = text.substring(index);
            String word = "";
            word = findMaxWord(tree, nexts, 0, word);
            if (XStr.hasAnyText(word)) {
                return word;
            }

            index++;
        }

        return null;
    }

    /**
     * 在文本中找出最长的敏感词
     *
     * @param tree
     * @param text
     * @param index
     * @param word
     */
    private static String findMaxWord(Map<String, Map> tree, String text, int index, String word) {
        Map<String, Map> subTree = tree.get(text.substring(index, index + 1));
        if (subTree != null) {
            Map<String, Object> end = subTree.get(XFinder.TREE_END_KEY);
            if (end != null) {
                //结束标记，证明已结束
                String sensitiveWord = (String) end.get(XFinder.WORD_VALUE);
                if (word.length() < sensitiveWord.length()) {
                    word = sensitiveWord;
                }
            }
            if ((index + 1) < text.length()
                    && (end == null || subTree.size() > 1)
            ) {
                return findMaxWord(subTree, text, index + 1, word);
            }
        }
        return word;
    }

    /**
     * 检查敏感词是不是存在结束标记
     *
     * @param wordArr
     */
    private static void checkWord(String... wordArr) {
        for (String word : wordArr) {
            if (word != null && word.contains(TREE_END_KEY)) {
                throw new RuntimeException("包含非法字符：" + TREE_END_KEY);
            }
        }
    }

    /**
     * 添加敏感词
     *
     * @param wordArr
     */
    public void addWords(String... wordArr) {
        checkWord(wordArr);

        for (String word : wordArr) {
            if (XStr.isNullOrEmpty(word)) continue; //已存在不添加

            word = word.trim();
            int len = word.length();
            if (len > 1024) {
                throw new RuntimeException("敏感词太长[最长1024]!");
            }
            // 添加该词，如果未重复，则添加到tree
            if (WORDS.add(word)) {
                treeAddWord(TREE, word);
            }

        }
        log.info("当前敏感词数量：{}", WORDS.size());
    }

    public void removeWords(String... wordArr) {
        for (String word : wordArr) {
            if (XStr.hasAnyText(word)) {
                word = word.trim();
                WORDS.remove(word);
            }
        }
        TREE.clear();
        addWords(WORDS.toArray(new String[WORDS.size()]));
    }

    /**
     * 将指定的词分成字构建到一棵树中。
     *
     * @param tree 字树
     * @param word 敏感词词
     */
    private static void treeAddWord(Map<String, Map> tree, String word) {
        treeAddWord(tree, word, 0);
    }

    private static Map<String, Map> treeAddWord(Map<String, Map> tree, String word, int index) {
        if (index == word.length()) {
            tree.put(XFinder.TREE_END_KEY, treeGenerateWordMap(word));
            return tree;
        }
        String next = word.substring(index, index + 1);
        Map<String, Map> subTree = tree.get(next);
        if (subTree == null) {
            subTree = new HashMap<>();
        }
        tree.put(next, treeAddWord(subTree, word, index + 1));
        return tree;
    }

    private static Map<String, Object> treeGenerateWordMap(String word) {
        Map<String, Object> wordMap = new HashMap<>();
        wordMap.put(XFinder.WORD_VALUE, word);
        wordMap.put(XFinder.WORD_LENGTH, word.length());
        return wordMap;
    }
}
